/* File: explore_acquisition_data.do
 * Author: Luca Maini
 *
 * Purpose: calculates some summary stats about deals, and creates datasets with
 *			information about deal characteristics for the analysis. The 
 *			datasets we need for the analysis are:
 *			
 *				1.	drug_overlap_measures: this contains drug-level indicators 
 *					for whether the drug is involved in a deal that increases
 *					consolidation in a therapeutic area (either ATC3 or ATC4)
 *				2.	acquisitions_characteristics: has deal-level indicators for
 *					type of deal
 *				3.	drug_acq_in_class: this contains the competitors of acquired
 *					drugs and drugs involved in overlap mergers. NOTE that the 
 *					dataset does not yet exclude drugs that are part of the 
 *					deal. This filter is applied later.
 *				4.	NO LONGER ACTIVE acquirer_activity_data: counts number of 
 *					products acquired and number of products acquired within 
 *					specific deal categories
 *
 * Date Created: 04/20/2021
 *
 */

/* 	In this script we extract a series of indicator variables for how various 
	drugs are affected by different deals. Our indicators fall into two 
	categories: 
		
		i)	indicators about ACQUIRED drugs, and 
		ii) indicator about drugs that belong to the ACQUIRING company. We call 
			these latter drugs ACQUIRER drugs. 
	
	We further distinguish two types of indicators:
		
		i)	indicators about deal type (these are the same for all drugs 
			involved in the deal), and
		ii) indicators about a drug's standing in the deal (these can be 
			different for different drugs in the deal).
	
	At this stage, the only indicator that differs by drug characteristics is 
	the overlapping indicator, which isolates drugs whose therapeutic class 
	overlaps with the therapeutic class of another drug that is involved in the 
	merger, and belongs to the other company.

*/

/////////////////////////////////////////////////////////////////////////
/////																/////
/////				STEP 0A. Preliminary dataset prep				/////
/////																/////
/////////////////////////////////////////////////////////////////////////

* Early termination information
import excel "${inputdir}\early_termination_indicator.xlsx", ///
	sheet("Sheet1") firstrow clear

drop if dealID == .	// empty lines imported from the excel file
rename early_termination_ind ea_ind

save "${outdir}\early_termination_ind.dta", replace



/////////////////////////////////////////////////////////////////////////
/////																/////
/////	STEP 0B. Get list of drugs and firms involved in each deal	/////
/////																/////
/////////////////////////////////////////////////////////////////////////

* In the acquisition data, we only note the products that were ACQUIRED in the 
* deal. Conversely, in the sales data we do not mark products of the ACQUIRING
* company directly. So, in this script we create lists of companies and drugs
* that are involved in the deal in various capacities.

* 1. List of ACQUIRED companies and the products that were acquired (this is 
* just a subset of variables already in the acquisition dataset.
use "${maindir}\acquisitionsClean_selected.dta", clear

keep dealID acquisitionQrt TargetName Product

save "${outdir}\deal_target_list.dta", replace


* Now do the same for Acquirors. This is a bit more complicated because it 
* requires merging to the sales data.
use "${maindir}\acquisitionsClean_selected.dta", clear
keep dealID acquisitionQrt AcquirorName
duplicates drop

* Reshape to account for the few cases with multiple deals in the same quarter
bysort AcquirorName acquisitionQrt : gen seq = _n
reshape wide dealID, i(AcquirorName acquisitionQrt) j(seq)

* Now, we want to select products that: (i) belonged to the acquiring company in
* the quarter BEFORE the acquisition, and (ii) still belong to the company in
* the quarter when the acquisition took place. To ensure both conditios are
* satisfied we create two datasets, one with acquirer, quarter and dealID, and
* another one that is identical, but with a quarter transposed back by one. We
* then merge each separately onto the sales dataset and append the resulting 
* merges.

* prep variable names for merge
gen mktCompany = AcquirorName
rename acquisitionQrt quarter	// for merging purposes

tempfile acqQrt
save "`acqQrt'", replace

** Now move quarter back one
replace quarter = quarter - 1

tempfile prevQrt
save "`prevQrt'", replace

* now do the merges
use "`acqQrt'", clear

* get list of products in the acquisition quarter (companies that do not have in
* the quarter when the acquisition takes place will show up with missing product 
* names)
merge 1:m mktCompany quarter ///
	using "${outdir}\ProductLevelSalesSSRwithAcquisition.dta", ///
		nogen keep(master match) keepusing(Product)
drop if Product == ""
tempfile acqQrt_merged
save "`acqQrt_merged'", replace


use "`prevQrt'", clear

* get list of products in the previous quarter (companies that do not have in
* the quarter before the acquisition takes place will show up with missing
* product names)
merge 1:m mktCompany quarter ///
	using "${outdir}\ProductLevelSalesSSRwithAcquisition.dta", ///
		nogen keep(master match) keepusing(Product)
drop if Product == ""
append using "`acqQrt_merged'"

* keep only observations that are matched twice
bysort Product AcquirorName dealID* : gen tot = _N
drop if tot == 1

* drop observation for the previous quarter
bysort Product AcquirorName dealID* (quarter) : keep if _n == 2

* some deals will not be matched. That means that the company did not have 
* products with positive sales in the data before the acquisition (or after).

* reshape back
reshape long dealID, i(AcquirorName quarter Product) j(seq)
drop seq
drop if dealID == .

rename quarter acquisitionQrt 

keep dealID acquisitionQrt AcquirorName Product
order dealID acquisitionQrt AcquirorName Product

* save
save "${outdir}\deal_acquiror_list.dta", replace




////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////







/////////////////////////////////////////////////
/////										/////
/////	STEP 1. Get deal characteristics	/////
/////										/////
/////////////////////////////////////////////////

/*	We create the following variables as deal characteristics:
	
		1. 	COMPANY_ACQ -> this is an indicator for a company acquisition 
			(instead of an asset acquisition)
		2. 	LARGER_ACQUIROR -> this is an indicator for whether the acquiring 
			company is larger than the acquired company. We measure size using
			net sales in the quarter prior to the acquisition.
		3. 	LARGE_ACQUISITION -> this is an indicator for whether the future 
			stream of sales of assets involved in the deal is above 500 million
			USD (this threshold is chosen arbitrarily)
		4.	PORTFOLIO_LOSS/GAIN -> this is an indicator for a deal that resulted
			in a large contraction or expansion for the portfolio of the firm
			involved.

	In order to create these variables we need to grab information about sales 
	from the sales dataset first.
	
	One issue with these measures is that a handful of products are co-marketed.
	We assign these products to ALL companies involved in the co-marketing. In 
	order to do so appropriately, we have to temporarily add an extra 
	observation for those products in the data. We then return the dataset to 
	the drug-year level. Co-marketed products are assigned the MAX of each
	variable.
	
*/



* 1. We get rolling yearly net sales
* Open sales data (just to grab yearly-level net sales)
use "${outdir}\ProductLevelSalesSSRwithAcquisition.dta", clear

* reshape to get single company names
rename mktCompany Company
split Company, parse(;)
foreach var of varlist Company* {
	replace `var' = trim(`var')
	}

rename Company mktCompany

reshape long Company, i(Product mktCompany quarter) j(companyTracker)
drop if Company == ""
drop companyTracker mktCompany


* get total net sales
bysort Company quarter : egen tot_firm_net_sales = total(net_sales)

* save
keep Company quarter tot_firm_net_sales
duplicates drop

bysort Company (quarter) : gen tot_firm_net_sales_yr_rolling = ///
	tot_firm_net_sales + tot_firm_net_sales[_n-1] + ///
	tot_firm_net_sales[_n-2] + tot_firm_net_sales[_n-3]
* adjust for first three observations
bysort Company (quarter) : replace tot_firm_net_sales_yr_rolling = ///
	tot_firm_net_sales + tot_firm_net_sales[_n-1] + ///
	tot_firm_net_sales[_n-2] if _n == 3
bysort Company (quarter) : replace tot_firm_net_sales_yr_rolling = ///
	tot_firm_net_sales + tot_firm_net_sales[_n-1] if _n == 2
bysort Company (quarter) : replace tot_firm_net_sales_yr_rolling = ///
	tot_firm_net_sales if _n == 1

replace quarter = quarter + 1	// want to use sales of quarter before the deal
drop tot_firm_net_sales
rename tot_firm_net_sales_yr_rolling tot_firm_net_sales

save "${outdir}/firm_total_sales.dta", replace




* 2. total sales of each product from each acquisition and going forward
use "${outdir}\ProductLevelSalesSSRwithAcquisition.dta", clear
drop if quarter >= qofd(LOE)	// exclude products post-exclusivity

* get total sales after acquisition and for the first year after acquisition. 
* We loop over each acquisition (there are up to 4).
forvalues i = 1/4 {
	bysort Product (quarter) : egen tot_drug_postAcq_net_sales`i' = ///
		total(net_sales * (quarter >= acquisitionQrt`i'))
	
	bysort Product (quarter) : egen year1_drug_postAcq_net_sales`i' = ///
		total(net_sales * (quarter >= acquisitionQrt`i' & ///
						   quarter < acquisitionQrt`i' + 4))
	
	replace year1_drug_postAcq_net_sales`i' = -99 ///
		if acquisitionQrt`i' > tq(2019q1)	// need one full year of data
	}

* generate a single variable for total net sales post-deal. Also assign a dealID
* to each product in the right quarter, as well as an acquisitionQrt.
gen tot_drug_postAcq_net_sales = .
gen year1_drug_postAcq_net_sales = .
gen dealID = .

forvalues i = 1/4 {
	replace tot_drug_postAcq_net_sales = tot_drug_postAcq_net_sales`i' ///
		if quarter == acquisitionQrt`i'
	replace year1_drug_postAcq_net_sales = year1_drug_postAcq_net_sales`i' ///
		if quarter == acquisitionQrt`i'
	replace dealID = dealID`i' if quarter == acquisitionQrt`i'
	}

* now collapse to the deal level to get the total future revenue stream of all
* assets involved
collapse (sum) tot_drug_postAcq_net_sales year1_drug_postAcq_net_sales, ///
	by(dealID)
drop if dealID == .

save "${outdir}/deal_postAcq_sales.dta", replace



* 3. Now get the dealIDs for acquisitions resulting in a large portfolio gain or 
*	 loss.

* first, get clean list of just deals and companies involved. One list for 
* targets, one for acquirers. To merge in later.
use "${maindir}\acquisitionsClean_selected.dta", clear
keep dealID TargetName AcquirorName acquisitionQrt
duplicates drop

* Acquirer list
preserve
drop TargetName
duplicates drop
rename dealID dealID_acqr
bysort AcquirorName acquisitionQrt : gen n = _n
reshape wide dealID_acqr, i(AcquirorName acquisitionQrt) j(n)
rename AcquirorName Company
rename acquisitionQrt quarter

tempfile Acquirers
save `Acquirers', replace

restore

* Targets
drop AcquirorName
duplicates drop
rename dealID dealID_acqd
bysort TargetName acquisitionQrt : gen n = _n
reshape wide dealID_acqd, i(TargetName acquisitionQrt) j(n)
rename TargetName Company
rename acquisitionQrt quarter

tempfile Targets
save `Targets', replace


* Now open sales data
use "${outdir}\ProductLevelSalesSSRwithAcquisition.dta", clear

* duplicate observations for drugs that are associated to more than one company
rename mktCompany Company
split Company, parse(;)
foreach var of varlist Company* {
	replace `var' = trim(`var')
	}

drop Company

reshape long Company, i(Product quarter) j(companyTracker)
drop if Company == ""


* get total net sales by company and quarter
bysort Company quarter : egen tot_firm_net_sales = total(net_sales)

* preserve so we can merge back portfolio indicators to a dataset at the 
* Product-quarter level
preserve

* collapse to company level
keep Company quarter tot_firm_net_sales
duplicates drop

* Merge with info about deals
merge 1:1 Company quarter using `Acquirers', keep(master match) nogen
merge 1:1 Company quarter using `Targets', keep(master match) nogen

* Now calculate changes in the various portfolio measures
bysort Company (quarter) : gen perc_diff_tot_firm_net_sales = ///
	(tot_firm_net_sales-tot_firm_net_sales[_n-1])/tot_firm_net_sales[_n-1]

* We identify deals such that change in net sales results in a top (or bottom) 
* 10% change
qui sum perc_diff_tot_firm_net_sales if dealID_acqr1 != ., d
local qAtot_firm_net_sales = r(p90)
qui sum perc_diff_tot_firm_net_sales if dealID_acqd1 != ., d
local qTtot_firm_net_sales = r(p10)

gen portfolio_gain = ///
	perc_diff_tot_firm_net_sales > `qAtot_firm_net_sales' & ///
	perc_diff_tot_firm_net_sales < . & ///
	dealID_acqr1 != .
	
gen portfolio_loss = ///
	perc_diff_tot_firm_net_sales < `qTtot_firm_net_sales' & ///
	dealID_acqd1 != .

* keep only Company, quarter and indicators, and save
keep Company quarter portfolio_loss portfolio_gain
keep if portfolio_loss == 1 | portfolio_gain == 1

save "${outdir}/portfolio_metrics_company.dta", replace

* merge to match to all products owned by the acquiring company (in case of a 
* portfolio gain), and to products remaining to the target company (in case of a
* portfolio loss)restore
rename Company AcquirorName
rename quarter acquisitionQrt
merge 1:m AcquirorName acquisitionQrt ///
	using "${outdir}/deal_acquiror_list.dta", keep(match) nogen

* save as a temporary file
keep AcquirorName acquisitionQrt Product portfolio_gain portfolio_loss
rename AcquirorName Company
rename acquisitionQrt quarter
duplicates drop

tempfile portfolio_gains
save `portfolio_gains', replace

* merge for portfolio loss
restore
merge m:1 Company quarter ///
	using "${outdir}/portfolio_metrics_company.dta", keep(match) nogen

keep if portfolio_loss == 1
keep Company quarter Product portfolio_gain portfolio_loss
duplicates drop

* append with the previous dataset and save
append using `portfolio_gains'
save "${outdir}/portfolio_metrics.dta", replace



* 4. Get age of each product at the time of acquisition, time to LOE, and create
*	 a indicator for product close to end of life-cycle

use "${outdir}\ProductLevelSalesSSRwithAcquisition.dta", clear

drop if acquisitionQrt1 == .

*** calculate mkt_start date
gen start_year = yofd(mkt_start)

*** calculate mkt_end year (the earliest of LOE or mkt_end)
gen end_year = yofd(LOE)
replace end_year = yofd(mkt_end) if yofd(LOE) > yofd(mkt_end) // this also fixes missing values of LOE

* gen age and time to mkt_end at acquisition
forvalues n = 1/4 {
	gen age_at_acq`n' = yofd(dofq(acquisitionQrt`n')) - start_year
	gen yrs_to_mkt_end_at_acq`n' = end_year - yofd(dofq(acquisitionQrt`n'))
	gen old_prod_flag`n' = (yrs_to_mkt_end_at_acq`n' <= 3) | ///
		(yrs_to_mkt_end_at_acq`n' == . & age_at_acq`n' >= 12 & age_at_acq`n' < .)
	}

* reshape to get a dataset at the Product-dealID level
keep Product dealID* acquisitionQrt* old_prod_flag*
duplicates drop
reshape long dealID acquisitionQrt old_prod_flag, i(Product) j(temp)

drop temp
drop if dealID == .

bysort dealID : egen all_old = min(old_prod_flag)
bysort dealID : egen perc_old = mean(old_prod_flag)

* now keep only dealID-level variables (might go back at some point)
keep dealID all_old perc_old
duplicates drop
save "${outdir}/old_prod_flag.dta", replace



* 5. Now get the company acquisition indicator as well and combine with the 
* 	 variables created earlier.
use "${maindir}\acquisitionsClean_selected.dta", clear

* Company acquisition indicator
gen company_acq = dealCategory == "COMPANY ACQUISITION"

keep dealID AcquirorName TargetName company_acq acquisitionQrt ///
	ValueofTransactionMil num_products

duplicates drop	// gets rid of drug-specific information


** Merge with future revenue stream info
merge 1:1 dealID using "${outdir}/deal_postAcq_sales.dta", nogen

* unmatched observations have no drugs that earn future revenue in our data
replace tot_drug_postAcq_net_sales = 0 if tot_drug_postAcq_net_sales == .
replace year1_drug_postAcq_net_sales = 0 if year1_drug_postAcq_net_sales == .


** Merge with info about company size, R&D and ad indicators
gen quarter = acquisitionQrt

*** First do Acquiror (NOTE: the reason we can simply merge the single company
*** name onto the deal dataset is that acquisitions and targets are always 
*** single companies).
gen Company = AcquirorName
merge m:1 Company quarter using "${outdir}/firm_total_sales.dta", ///
	keep(master match) nogen

* unmatched observations earn no revenue in our data prior to the deal UNLESS 
* the deal occurred in 2007
replace tot_firm_net_sales = 0 if tot_firm_net_sales == .
replace tot_firm_net_sales = . if yofd(dofq(acquisitionQrt)) == 2007
rename tot_firm_net_sales tot_Acquiror_net_sales 

drop Company

*** Next do Target
gen Company = TargetName
merge m:1 Company quarter using "${outdir}/firm_total_sales.dta", ///
	keep(master match) nogen

* unmatched observations earn no revenue in our data prior to the deal UNLESS 
* the deal occurred in 2007
replace tot_firm_net_sales = 0 if tot_firm_net_sales == .
replace tot_firm_net_sales = . if yofd(dofq(acquisitionQrt)) == 2007
rename tot_firm_net_sales tot_Target_net_sales 

drop Company quarter


** Generate indicator for large acquisition
sum year1_drug_postAcq_net_sales if year1_drug_postAcq_net_sales >= 0, d
gen large_deal = year1_drug_postAcq_net_sales > r(p50)

** Generate indicator for larger acquirer
gen lg_acq = tot_Acquiror_net_sales > tot_Target_net_sales & tot_Acquiror_net_sales < .
replace lg_acq = 1 if company_acq == 1

gen sm_acq = 1 - lg_acq
replace sm_acq = 0 if tot_Acquiror_net_sales == . | tot_Target_net_sales == .

** Generate indicator for Valeant
gen valeant = AcquirorName == "VALEANT PHARMACEUTICALS"

gen deal_year = year(dofq(acquisitionQrt))

merge m:1 deal_year using "${inputdir}\HSR_thresholds.dta" // all master is matched
keep if _merge == 3
drop _merge

gen medVH = (ValueofTransactionMil < high_threshold)
gen lowVH = (ValueofTransactionMil < low_threshold)
gen missVH = ValueofTransactionMil == .

* merge old_product indicators
merge 1:1 dealID using "${outdir}/old_prod_flag.dta", ///
	keep(master match) keepusing(all_old) nogen

replace all_old = 0 if all_old == .

* Merge with early termination indicator
merge 1:1 dealID using "${outdir}\early_termination_ind.dta", ///
	keep(master match) nogen	// some non-matched from using are financial deals
replace ea_ind = 0 if ea_ind == .

* generate "stealth" indicator (medVH and no ea_ind)
gen stlth = medVH == 1 & ea_ind == 0

** keep only relevant variables and save
keep dealID company_acq large_deal lg_acq sm_acq valeant ///
	AcquirorName TargetName ValueofTransactionMil num_products ///
	year1_drug_postAcq_net_sales tot_drug_postAcq_net_sales ///
	medVH tot_Acquiror_net_sales tot_Target_net_sales ea_ind stlth

label var company_acq "Indicator for company acquisition"
label var tot_drug_postAcq_net_sales "Post-acquisition total net sales (all drugs)"
label var year1_drug_postAcq_net_sales "Post-acquisition year-1 net sales (all drugs)"
label var tot_Acquiror_net_sales "Pre-acquisition net sales of acquiring company"
label var tot_Target_net_sales "Post-acquisition net sales of target company"
label var large_deal "Indicator for above-median post-acquisition year-1 invoice sales"
label var lg_acq "Indicator for acquiring company larger than target company"
label var sm_acq "Indicator for target company larger than acquiring company"
label var valeant "Indicator for Valeant acquisition"
label var medVH "Indicator for deal below higher HSR threshold"
label var ea_ind "Early termination indicator"
label var stlth "Stealth acquisition"

save "${maindir}\acquisition_characteristics.dta", replace





////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////





/////////////////////////////////////////////////////
/////											/////
/////	STEP 2. Get drug-specific information 	/////
/////											/////
/////////////////////////////////////////////////////


/* Blockbuster variables:
	
	BLOCKBUSTER_ACQUISITION -> this is an indicator for acquiring a blockbuster 
		drug (i.e. a drug that was or will be in the top 5% of yearly sales at 
		some point in its lifecycle) for a company that previously did not have 
		one.
	ACQUIRED_BY_BLOCKBUSTER -> this is an indicator for being acquired by a 
		company that owns a blockbuster drug (for drugs that were previously 
		owned by a company that did not own another blockbuster drug).
*/


*** Get a blockbuster (at the drug level)
use "${outdir}\ProductLevelSalesSSRwithAcquisition.dta", clear
drop if quarter >= qofd(LOE)	// exclude products post-exclusivity

gen year = yofd(dofq(quarter))

bysort Product year : egen yearly_net_sales = total(net_sales)
bysort Product year : gen indY = _n == 1

* Identify "blockbusters" (top 5% of net sales in any given calendar year)
gen ind = 0
forvalues y = 2007/2019 {
	sum yearly_net_sales if year == `y' & indY == 1, d
	local p95 = r(p95)
	replace ind = 1 if yearly_net_sales >= `p95' & yearly_net_sales < . & year == `y'
	}

bysort Product : egen blockbuster = max(ind)
drop ind

* duplicate observations for drugs that are associated to more than one company
rename mktCompany Company
split Company, parse(;)
foreach var of varlist Company* {
	replace `var' = trim(`var')
	}

rename Company mktCompany

reshape long Company, i(Product mktCompany quarter) j(companyTracker)
drop companyTracker
drop if Company == ""

* count Products by company
bysort Company quarter : egen num_blockbuster = total(blockbuster)
gen has_other_blockbuster = (num_blockbuster - blockbuster) > 0
gen has_blockbuster = num_blockbuster > 0	// this is at the company level

keep Company Product quarter blockbuster has_blockbuster has_other_blockbuster
duplicates drop

* condense to the product level
collapse (max) blockbuster has_blockbuster has_other_blockbuster, ///
	by(Product quarter)

* save
save "${outdir}/product_blockbusters.dta", replace


* Merge this information to acquirer and acquired products
* Merge the datasets that list products of firms involved in the deals so we can 
* ascertain their blockbuster status

use "${outdir}\deal_acquiror_list.dta", clear
rename Product Product1	// this is going to be the ACQUIRER product 
label var Product1 "Acquirer product"

joinby dealID using "${outdir}/deal_target_list.dta"
rename Product Product2 // this is going to be the TARGET product 
label var Product2 "Target product"

* merge to get blockbuster status for ACQUIRER products. We care about status in 
* the quarter BEFORE acquisition
gen quarter = acquisitionQrt - 1
rename Product1 Product

merge m:1 Product quarter using "${outdir}/product_blockbusters.dta", ///
	keep(master match) nogen keepusing(has_blockbuster has_other_blockbuster)

rename has_blockbuster has_blockbuster_Acquirer
rename has_other_blockbuster has_other_blockbuster_Acquirer

rename Product Product1

* merge again to get blockbuster status for TARGET products in the quarter 
* BEFORE acquisition (unmatched observations means that there is no sales data 
* for the acquired product in the quarters BEFORE acquisition)
rename Product2 Product

merge m:1 Product quarter using "${outdir}/product_blockbusters.dta", ///
	keep(master match) keepusing(has_blockbuster has_other_blockbuster)

rename Product Product2
drop quarter

rename has_blockbuster has_blockbuster_Target
rename has_other_blockbuster has_other_blockbuster_Target

* generate the blockbuster deal variables
gen acquired_by_blockbuster = has_blockbuster_Acquirer == 1 & has_other_blockbuster_Target == 0
gen blockbuster_acquisition = has_other_blockbuster_Acquirer == 0 & has_blockbuster_Target == 1

* Reshape to get a deal-Product level dataset (with acquirer and acquired status)
keep dealID acquisitionQrt Product1 Product2 ///
	blockbuster_acquisition acquired_by_blockbuster

* status is 1 for acquirer and 2 for acquired
gen id = _n 
reshape long Product, i(id) j(status)
drop id

* edit variables so they reflect status
replace acquired_by_blockbuster = 0 if status == 1 	// variable only applies to Target products
replace blockbuster_acquisition = 0 if status == 2	// variable only applies to Acquirer products

drop status
duplicates drop

* select variables and save
save "${outdir}\drug_blockbuster_measures.dta", replace


*** Overlap in therapeutic class

* The key contribution in this step is calculating the overlap in therapeutic 
* class between the set of products owned by the two companies that are part of
* each deal. This is a complex procedure that requires several steps.


* Merge the datasets that list products of firms involved in the deals to get 
* pairs of drugs whose therapeutic classes we can compare
use "${outdir}\deal_acquiror_list.dta", clear
rename Product Product1	// this is going to be the ACQUIRER product 
label var Product1 "Acquirer product"

drop if Product1 == ""	// these are the companies with no sales

joinby dealID using "${outdir}\deal_target_list.dta"
rename Product Product2 // this is going to be the TARGET product 
label var Product2 "Target product"

* Now merge with a dataset that contains similarity measures between drugs. 
* There will be a low match rate because the dataset only contains drugs that
* share at least the same ATC3.
merge m:1 Product1 Product2 using "${inputdir}\ssr_product_similarity_measures.dta", ///
	keep(master match) nogen

* some matches are due to missing info on ATC
drop if atc3 == "NA"
keep dealID acquisitionQrt Product1 Product2 same_atc3 same_atc4 atc3

* replace missing data with 0s (if the pair does not match it means they do not
* share an ATC3 code)
replace same_atc3 = 0 if same_atc3 == .
replace same_atc4 = 0 if same_atc4 == .

* save a temporary file that has the list of matched products
preserve
keep Product1 Product2 same_atc3 dealID acquisitionQrt atc3
drop if same_atc3 == 0
duplicates drop

save "${outdir}\overlap_pair_list.dta", replace
restore

* reshape to get a different variable for overlap by drug status (status is 1 
* for acquirer and 2 for acquired)
gen id = _n 
reshape long Product, i(id) j(status)

* add acquired/acquirer indicators
gen acqr_drug = status == 1
gen acqd_drug = status == 2
drop status

* generate separate indicators for same_atc3/4 and acquirer/acquired drugs
generate acqr_drug_same_atc3 = same_atc3 == 1 & acqr_drug == 1
generate acqr_drug_same_atc4 = same_atc4 == 1 & acqr_drug == 1

generate acqd_drug_same_atc3 = same_atc3 == 1 & acqd_drug == 1
generate acqd_drug_same_atc4 = same_atc4 == 1 & acqd_drug == 1

* now collapse at the Product-dealID-quarter level. We use max because the 
* overlap variable will be 1 even if there is only one overlapping match.
collapse (max) acqr* acqd*, by(dealID Product acquisitionQrt)

* save
save "${maindir}\drug_overlap_measures.dta", replace

* create dataset with owned-by-acquirer overlap metrics at the drug-YEAR level
gen year = yofd(dofq(acquisitionQrt))
keep Product year acqr_drug acqr_drug_same_atc3 acqr_drug_same_atc4
collapse (max) acqr_drug*, by(Product year)

save "${maindir}\acqr_drug_overlap_measures.dta", replace




////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////







/////////////////////////////////////////////////
/////										/////
/////	STEP 3. Get competition variables	/////
/////										/////
/////////////////////////////////////////////////


/* 	Get a dataset of ATC-4 overlaps (and ATC-3 overlaps). 
	Several versions:
		
		1.	Competing with acquired drugs (any ATC-4)
		2.	ATC-3 competitor of drug involved in an overlap merger
		3.	ATC-4 competitor of drug involved in an overlap merger
		4.	ATC-3 competitor of drug involved in a low-value overlap merger
		5.	ATC-4 competitor of drug involved in a low-value overlap merger
*/




***
*** Dataset 1: close competitor of acquired drug (ATC-4 level)
***

use "${outdir}\deal_target_list.dta", clear	// list of acquired drugs
rename Product Product2	// IMPORTANT: keep Product2 bc that's what we used as Target product in previous code

joinby Product2 using "${inputdir}\ssr_product_similarity_measures.dta"

keep if same_atc4 == 1	// these are the close matches we want

* now we want to drop matches that are on the overlap list (because these are 
* drugs involved in the merger)
merge 1:1 Product1 Product2 dealID acquisitionQrt ///
	using "${outdir}\overlap_pair_list.dta", keep(master) nogen

* now keep all matched Product1: these are drugs in the same ATC-4 as an 
* acquired drug
rename same_atc4 acq_in_atc4
label var acq_in_atc4 "ATC-4 competitor of acquired drug"
rename Product1 Product
collapse (max) acq_in_atc4, by(Product dealID acquisitionQrt)

* collapse at the Product-quarter level (sum indicators if necessary)
collapse (sum) acq_in_atc4, by(Product acquisitionQrt)

* save
save "${outdir}\drug_acq_in_class_1.dta", replace


***
*** Dataset 2: ATC-3 competitor in class with ATC-3 overlap acquisition
***

* first get a list of ATC-3 that experienced consolidation
use "${outdir}\overlap_pair_list.dta", clear
merge m:1 Product1 Product2 using "${inputdir}\ssr_product_similarity_measures.dta", ///
	keep(master match) nogen

* some matches are due to missing info on ATC
drop if atc3 == "NA"
keep if same_atc3 == 1

keep atc3 dealID acquisitionQrt	// this is a list of ATC-3 codes where 
								// consolidation happened, with dealID and 
								// quarter of acquisition
duplicates drop

tempfile atc3list
save "`atc3list'", replace

* merge this data back into the product_similarity_measures data. This gives us 
* a list of all drugs in those ATC-3 codes
use "${inputdir}\ssr_product_similarity_measures.dta", clear

keep Product1 atc3
duplicates drop

joinby atc3 using "`atc3list'"

rename Product1 Product
gen ov_acq_in_atc3 = 1
label var ov_acq_in_atc3 "Overlap acquisition in ATC-3"

* now remove drugs involved in the actual acquisition, so that the drugs that 
* are left are drugs not involved, but just competing with drugs that are 
* involved

merge m:1 Product dealID acquisitionQrt ///
	using "${outdir}\deal_target_list.dta", keep(master) nogen
	
merge m:1 Product dealID acquisitionQrt ///
	using "${outdir}\deal_acquiror_list.dta", keep(master) nogen
	
* collapse at the acquisition quarter level
collapse (sum) ov_acq_in_atc3, by(Product acquisitionQrt)

* save
save "${outdir}\drug_acq_in_class_2.dta", replace



***
*** Dataset 3: ATC-4 competitor in class with ATC-3 overlap acquisition
***


* Get the ATC-4 code of all drugs involved in overlap acquisitions
use "${outdir}\overlap_pair_list.dta", clear
merge m:1 Product1 Product2 using "${inputdir}\ssr_product_similarity_measures.dta", ///
	keep(master match) nogen

* some matches are due to missing info on ATC
drop if atc3 == "NA"
keep if same_atc3 == 1

keep dealID acquisitionQrt atc4_1 atc4_2

bysort dealID acquisitionQrt : gen temp = _n

reshape long atc4_, i(dealID acquisitionQrt temp) j(n) 

drop temp n
rename atc4_ atc4

duplicates drop 	// this is a list of ATC-4 codes that have products involved
					// in an overlap merger, with dealID and quarter of 
					// acquisition

tempfile atc4list
save "`atc4list'", replace

* Now get a list of all drugs in those ATC-4 codes
use "${inputdir}\ssr_product_similarity_measures.dta", clear

keep Product1 Product2 atc4_1 atc4_2
egen temp = seq()
reshape long Product atc4_, i(temp) j(n)
drop temp n

rename atc4_ atc4

duplicates drop

joinby atc4 using "`atc4list'"

gen ov_acq_atc4 = 1
label var ov_acq_atc4 "ATC-4 competitor in overlap acquisition"


* now remove drugs involved in the actual acquisition, so that the drugs that 
* are left are drugs not involved, but just competing with drugs that are 
* involved
merge m:1 Product dealID acquisitionQrt ///
	using "${outdir}\deal_target_list.dta", keep(master) nogen
	
merge m:1 Product dealID acquisitionQrt ///
	using "${outdir}\deal_acquiror_list.dta", keep(master) nogen


* collapse at the acquisition quarter level
collapse (sum) ov_acq_atc4, by(Product acquisitionQrt)


* save
save "${outdir}\drug_acq_in_class_3.dta", replace



***
*** Dataset 4: drugs competing in classes with stealth horizontal acquisition
***

* First get the list of stealth acquisitions
use "${maindir}\acquisition_characteristics.dta", clear
keep if stlth == 1
keep dealID

* Next, repeat the same steps as before. Get a list of ATC-3 that experienced 
* consolidation from low-value deals
merge 1:m dealID using "${outdir}\overlap_pair_list.dta", keep(match) nogen

merge m:1 Product1 Product2 using "${inputdir}\ssr_product_similarity_measures.dta", ///
	keep(master match) nogen

* some matches are due to missing info on ATC
drop if atc3 == "NA"
keep if same_atc3 == 1

keep atc3 dealID acquisitionQrt	// this is a list of ATC-3 codes where 
								// consolidation happened, with dealID and 
								// quarter of acquisition
duplicates drop

tempfile atc3list
save "`atc3list'", replace

* merge this data back into the product_similarity_measures data. This gives us 
* a list of all drugs in those ATC-3 codes
use "${inputdir}\ssr_product_similarity_measures.dta", clear

keep Product1 atc3
duplicates drop

joinby atc3 using "`atc3list'"

rename Product1 Product
gen stlth_ov_acq_in_atc3 = 1
label var stlth_ov_acq_in_atc3 "stealth overlap acquisition in ATC-3"

* now remove drugs involved in the actual acquisition, so that the drugs that 
* are left are drugs not involved, but just competing with drugs that are 
* involved

merge m:1 Product dealID acquisitionQrt ///
	using "${outdir}\deal_target_list.dta", keep(master) nogen
	
merge m:1 Product dealID acquisitionQrt ///
	using "${outdir}\deal_acquiror_list.dta", keep(master) nogen
	
* collapse at the acquisition quarter level
collapse (sum) stlth_ov_acq_in_atc3, by(Product acquisitionQrt)

* save
save "${outdir}\drug_acq_in_class_4.dta", replace



***
*** Dataset 5: drugs competing with drugs that experienced stealth horizontal acquisitions
***

* First get the list of low-value deals
use "${maindir}\acquisition_characteristics.dta", clear
keep if stlth == 1
keep dealID

* Get the ATC-4 code of all drugs involved in overlap acquisitions
merge 1:m dealID using "${outdir}\overlap_pair_list.dta", keep(match) nogen
merge m:1 Product1 Product2 using "${inputdir}\ssr_product_similarity_measures.dta", ///
	keep(master match) nogen

* some matches are due to missing info on ATC
drop if atc3 == "NA"
keep if same_atc3 == 1

keep dealID acquisitionQrt atc4_1 atc4_2

bysort dealID acquisitionQrt : gen temp = _n

reshape long atc4_, i(dealID acquisitionQrt temp) j(n) 

drop temp n
rename atc4_ atc4

duplicates drop 	// this is a list of ATC-4 codes that have products involved
					// in an overlap merger, with dealID and quarter of 
					// acquisition

tempfile atc4list
save "`atc4list'", replace

* Now get a list of all drugs in those ATC-4 codes
use "${inputdir}\ssr_product_similarity_measures.dta", clear

keep Product1 Product2 atc4_1 atc4_2
egen temp = seq()
reshape long Product atc4_, i(temp) j(n)
drop temp n

rename atc4_ atc4

duplicates drop

joinby atc4 using "`atc4list'"

gen stlth_ov_acq_atc4 = 1
label var stlth_ov_acq_atc4 "ATC-4 competitor in stealth overlap acquisition"


* now remove drugs involved in the actual acquisition, so that the drugs that 
* are left are drugs not involved, but just competing with drugs that are 
* involved
merge m:1 Product dealID acquisitionQrt ///
	using "${outdir}\deal_target_list.dta", keep(master) nogen
	
merge m:1 Product dealID acquisitionQrt ///
	using "${outdir}\deal_acquiror_list.dta", keep(master) nogen


* collapse at the acquisition quarter level
collapse (sum) stlth_ov_acq_atc4, by(Product acquisitionQrt)

* save
save "${outdir}\drug_acq_in_class_5.dta", replace
